1 Study Overview

This notebook expands the CAFI 2019 survey analysis pipeline so that it is:

  • Rigorous – all preprocessing is explicit, traceable, and auditable.
  • Statistically sound – models and hypothesis tests are appropriate for the data-generating process.
  • Insight-driven – figures and tables target the key ecological questions: how CAFI communities vary across reefs, morphotypes, colony size, and surrounding coral habitat.

Field sampling followed the June–July 2019 neighborhood survey described in the project overview. We pair focal colony measurements with CAFI community composition and physiological data to identify drivers of abundance, diversity, and taxonomic structure.

2 Load Libraries & Configure Environment

library(conflicted)
library(here)
library(tidyverse)
library(lubridate)
library(janitor)
library(glue)
library(fs)
library(vegan)
library(broom)
library(patchwork)
library(scales)
library(MASS)
library(tibble)

conflicted::conflict_prefer("filter", "dplyr", quiet = TRUE)
conflicted::conflict_prefer("lag", "dplyr", quiet = TRUE)
conflicted::conflict_prefer("select", "dplyr", quiet = TRUE)

theme_set(theme_minimal(base_size = 12))
plot_theme <- theme_minimal(base_size = 12) +
  theme(
    panel.grid.minor = element_blank(),
    legend.position = "top",
    strip.background = element_rect(fill = "#f0f0f0", color = NA),
    strip.text = element_text(face = "bold")
  )

morphotype_palette <- c(
  "Tight branching" = "#1b9e77",
  "Wide branching" = "#d95f02",
  "Unknown" = "#7570b3"
)

region_palette <- c(
  "MRB" = "#1b9e77",
  "HAU" = "#d95f02",
  "MAT" = "#7570b3"
)

3 Data Sources & Output Paths

survey_paths <- list(
  characteristics = here("data", "Survey", "1. survey_coral_characteristics_merged_v2.csv"),
  cafi = here("data", "Survey", "1. survey_cafi_data_w_taxonomy_summer2019_v5.csv"),
  phys = here("data", "Survey", "1. survey_master_phys_data_v3.csv")
)

purrr::iwalk(
  survey_paths,
  ~{
    if (!fs::file_exists(.x)) {
      stop(glue("Missing expected file for {.y}: {.x}"), call. = FALSE)
    }
  }
)

output_dir <- here("output", "survey")
figure_dir <- here(output_dir, "figures")
table_dir <- here(output_dir, "tables")

fs::dir_create(c(output_dir, figure_dir, table_dir))

4 Import Raw Data

characteristics_raw <- readr::read_csv(
  survey_paths$characteristics,
  na = c("", "NA", "NaN"),
  show_col_types = FALSE
)

cafi_raw <- readr::read_csv(
  survey_paths$cafi,
  na = c("", "NA", "NaN"),
  show_col_types = FALSE
)

phys_raw <- readr::read_csv(
  survey_paths$phys,
  na = c("", "NA", "NaN"),
  show_col_types = FALSE
)

list(
  characteristics = characteristics_raw,
  cafi = cafi_raw,
  physiology = phys_raw
) %>%
  purrr::imap(~{
    message("Rows in ", .y, ": ", nrow(.x))
    invisible(NULL)
  })
## $characteristics
## NULL
## 
## $cafi
## NULL
## 
## $physiology
## NULL

5 Data Preparation

5.1 Focal Colony Attributes

characteristics <- characteristics_raw %>%
  janitor::clean_names() %>%
  mutate(
    sample_date = suppressWarnings(mdy(date)),
    branch_architecture_raw = branch_width,
    morphotype = case_when(
      branch_width %in% c("tight", "tight_branching") ~ "Tight branching",
      branch_width %in% c("wide", "wide_branching") ~ "Wide branching",
      !is.na(morphotype) ~ str_to_sentence(morphotype),
      TRUE ~ "Unknown"
    ),
    morphotype = factor(morphotype, levels = c("Tight branching", "Wide branching", "Unknown")),
    depth = readr::parse_number(as.character(depth)),
    region = str_extract(site, "^[A-Z]+"),
    region = factor(region, levels = c("MRB", "HAU", "MAT")),
    site_block = site,
    coral_volume_cm3 = coalesce(volume_lab, volume_field),
    coral_circumference_cm = coalesce(circ_lab, circ_field),
    coral_height_cm = coalesce(height_lab, height_field),
    coral_length_cm = coalesce(length_lab, length_field),
    coral_width_cm = coalesce(width_lab, width_field),
    neighbor_area_m2 = pi * (2^2), # sampling radius = 2 m
    number_of_neighbors = as.numeric(number_of_neighbors),
    mean_neighbor_distance = as.numeric(mean_neighbor_distance),
    mean_total_volume_of_neighbors = as.numeric(mean_total_volume_of_neighbors),
    combined_total_volume_of_neighbors = as.numeric(combined_total_volume_of_neighbors),
    mean_live_volume_of_neighbors = as.numeric(mean_live_volume_of_neighbors),
    combined_live_volume_of_neighbors = as.numeric(combined_live_volume_of_neighbors),
    neighbor_density = if_else(!is.na(number_of_neighbors), number_of_neighbors / neighbor_area_m2, NA_real_),
    neighbor_total_volume_density = if_else(
      !is.na(combined_total_volume_of_neighbors),
      combined_total_volume_of_neighbors / neighbor_area_m2,
      NA_real_
    ),
    neighbor_live_volume_density = if_else(
      !is.na(combined_live_volume_of_neighbors),
      combined_live_volume_of_neighbors / neighbor_area_m2,
      NA_real_
    ),
    coral_isolation_index = mean_neighbor_distance,
    h_substrate = as.numeric(h_substrate),
    notes = na_if(notes, "")
  ) %>%
  select(
    coral_id, region, site_block, survey_type, sample_date,
    morphotype, branch_architecture_raw, depth,
    coral_length_cm, coral_width_cm, coral_height_cm,
    coral_volume_cm3, coral_circumference_cm, h_substrate,
    number_of_neighbors, neighbor_density,
    mean_neighbor_distance,
    mean_total_volume_of_neighbors, combined_total_volume_of_neighbors,
    mean_live_volume_of_neighbors, combined_live_volume_of_neighbors,
    neighbor_total_volume_density, neighbor_live_volume_density,
    notes
  )

glimpse(characteristics)
## Rows: 114
## Columns: 24
## $ coral_id                           <chr> "MRB-POC01", "MRB-POC02", "MRB-POC0…
## $ region                             <fct> MRB, MRB, MRB, MRB, MRB, MRB, MRB, …
## $ site_block                         <chr> "MRB1", "MRB1", "MRB2", "MRB2", "MR…
## $ survey_type                        <chr> "neighborhood", "neighborhood", "ne…
## $ sample_date                        <date> 2019-06-25, 2019-06-25, 2019-06-26…
## $ morphotype                         <fct> Tight branching, Wide branching, Wi…
## $ branch_architecture_raw            <chr> "tight", "wide", "wide", "tight", "…
## $ depth                              <dbl> 6, 6, 6, 5, 6, 6, 7, 7, 5, 6, 6, 7,…
## $ coral_length_cm                    <dbl> NA, 26, NA, 17, 36, 17, 25, 24, 31,…
## $ coral_width_cm                     <dbl> NA, 20, NA, 15, 22, 14, 21, 22, 24,…
## $ coral_height_cm                    <dbl> NA, 13, NA, 11, 27, 11, 18, 18, 15,…
## $ coral_volume_cm3                   <dbl> NA, 3539.528, NA, 1468.695, 11196.6…
## $ coral_circumference_cm             <dbl> NA, 65, NA, 30, 83, 30, 69, 73, 81,…
## $ h_substrate                        <dbl> 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 1…
## $ number_of_neighbors                <dbl> 22, 21, 53, 66, 24, 38, 28, 36, 14,…
## $ neighbor_density                   <dbl> 1.75070437, 1.67112690, 4.21760599,…
## $ mean_neighbor_distance             <dbl> 137.22727, 88.85714, 113.37736, 128…
## $ mean_total_volume_of_neighbors     <dbl> 22.32435, 985.23836, 113.67033, 642…
## $ combined_total_volume_of_neighbors <dbl> 491.1357, 20690.0056, 6024.5275, 42…
## $ mean_live_volume_of_neighbors      <dbl> 22.31007, 976.43068, 92.82122, 387.…
## $ combined_live_volume_of_neighbors  <dbl> 490.8215, 20505.0444, 4919.5247, 25…
## $ neighbor_total_volume_density      <dbl> 39.08333, 1646.45833, 479.41667, 33…
## $ neighbor_live_volume_density       <dbl> 39.05833, 1631.73958, 391.48333, 20…
## $ notes                              <chr> "slurry not retained - proper mater…

5.2 CAFI Community Records

cafi <- cafi_raw %>%
  janitor::clean_names() %>%
  select(-matches("^(x|X)(\\d+)?$")) %>%
  mutate(
    coral_id = str_trim(coral_id),
    site = na_if(site, ""),
    extraction_method = str_replace_all(extraction_method, "_", " "),
    type = str_replace_all(type, "_", " "),
    type = str_to_title(type),
    measurement = str_replace_all(measurement, "_", " "),
    alt_measurement = str_replace_all(alt_measurement, "_", " "),
    cafi_size_mm = readr::parse_number(as.character(cafi_size_mm)),
    alt_size_mm = readr::parse_number(as.character(alt_size_mm)),
    taxa_lowest = coalesce(
      lowest_level,
      if_else(!is.na(genus) & !is.na(species), str_c(genus, species, sep = " "), NA_character_),
      genus,
      family,
      code
    ),
    taxa_lowest = str_squish(str_replace_all(taxa_lowest, "_", " ")),
    id_certainty = factor(
      id_certainty,
      levels = c("certain", "revision_possible", "uncertain", "unknown")
    ),
    extraction_method = str_squish(extraction_method),
    identification_notes = na_if(identification_notes, ""),
    general_notes = na_if(general_notes, "")
  ) %>%
  filter(!is.na(coral_id)) %>%
  left_join(
    characteristics %>%
      select(coral_id, region, site_block, morphotype),
    by = "coral_id"
  )

glimpse(cafi)
## Rows: 3,989
## Columns: 41
## $ master_sort          <dbl> 2825, 1162, 2777, 2778, 1365, 1170, 1207, 2561, 2…
## $ old_sort             <dbl> 2815, 1152, 2767, 2768, 1355, 1160, 1197, 2551, 2…
## $ coral_id             <chr> "HAU-POC29", "HAU-POC13", "HAU-POC25", "HAU-POC25…
## $ site                 <chr> NA, "HAU04", NA, NA, "MAT01", "HAU05", "HAU05", N…
## $ code                 <chr> "ACHI", "ALAE", "ALAE", "ALAE", "ALCL", "ALCO", "…
## $ type                 <chr> "Crab", "Shrimp", "Shrimp", "Shrimp", "Shrimp", "…
## $ search_term          <chr> "Actaeodes hirsutissimus", "Alpheopsis aequalis",…
## $ lowest_level         <chr> "species", "species", "species", "species", "spec…
## $ cafi_size_mm         <dbl> 10, 9, 8, 10, 14, 16, 11, 13, 15, 16, 16, 15, 17,…
## $ measurement          <chr> "carapace width", "rostrum to tail", "rostrum to …
## $ alt_size_mm          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ alt_measurement      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ extraction_method    <chr> "clove", "clove", "clove", "clove", "clove", "clo…
## $ photo_num            <chr> "Unknown Crab 150", "Unknown Shrimp 367", "Unknow…
## $ id_certainty         <fct> certain, revision_possible, revision_possible, re…
## $ general_notes        <chr> NA, "gravid", NA, NA, NA, NA, "in post smash fold…
## $ identification_notes <chr> "Black setae on carapace, big claws with white sp…
## $ revised_yn           <chr> "y", "y", "y", "y", "y", "y", "y", NA, NA, NA, NA…
## $ revision_date        <chr> "5/22/2020", "30-Oct; 2020-05-11", "31-Oct", "31-…
## $ identified_by        <chr> "JC", "JC", "AP", "AP", "JC", "AP", "AP", NA, NA,…
## $ revised_by           <chr> "JC", "AP; JC; JC", "AP", "AP", "AP; JC; JC", "AP…
## $ previous_name        <chr> "pizza_crab", "tiger_alpheid; ALPC", "possible_AT…
## $ worms_id             <chr> "209051", "220235", "220235", "220235", "210529",…
## $ phylum               <chr> "Arthropoda", "Arthropoda", "Arthropoda", "Arthro…
## $ subphylum            <chr> "Crustacea", "Crustacea", "Crustacea", "Crustacea…
## $ superclass           <chr> "Multicrustacea", "Multicrustacea", "Multicrustac…
## $ class                <chr> "Malacostraca", "Malacostraca", "Malacostraca", "…
## $ subclass             <chr> "Eumalacostraca", "Eumalacostraca", "Eumalacostra…
## $ superorder           <chr> "Eucarida", "Eucarida", "Eucarida", "Eucarida", "…
## $ order                <chr> "Decapoda", "Decapoda", "Decapoda", "Decapoda", "…
## $ suborder             <chr> "Pleocyemata", "Pleocyemata", "Pleocyemata", "Ple…
## $ infraorder           <chr> "Brachyura", "Caridea", "Caridea", "Caridea", "Ca…
## $ superfamily          <chr> "Xanthoidea", "Alpheoidea", "Alpheoidea", "Alpheo…
## $ family               <chr> "Xanthidae", "Alpheidae", "Alpheidae", "Alpheidae…
## $ subfamily            <chr> "Actaeinae", NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ genus                <chr> "Actaeodes", "Alpheopsis", "Alpheopsis", "Alpheop…
## $ species              <chr> "Actaeodes hirsutissimus", "Alpheopsis aequalis",…
## $ taxa_lowest          <chr> "species", "species", "species", "species", "spec…
## $ region               <fct> HAU, HAU, HAU, HAU, MAT, HAU, HAU, HAU, HAU, HAU,…
## $ site_block           <chr> "HAU-S2", "HAU4", "HAU-S1", "HAU-S1", "MAT1", "HA…
## $ morphotype           <fct> Wide branching, Wide branching, Wide branching, W…

5.3 Physiological Measurements

numeric_phys_cols <- c(
  "slurry_volume", "dry_weight", "dip_weight", "wax_weight", "surface_area",
  "protein_absorbance", "protein_absorbance_se", "protein_mg_ml", "protein_se",
  "carb_absorbance", "carb_absorbance_se", "carb_mg_ml", "carb_se",
  "avg_num_zoox_cells", "num_zoox_cells_se",
  "burned_pan", "burned_pan_se", "burned_pan_dry_res", "burned_pan_dry_res_se",
  "dry_tissue", "dry_tissue_se", "burned_pan_burned_res", "burned_pan_burned_res_se",
  "afdw_g_ml", "afdw_se",
  "protein_mg_cm2", "carb_mg_cm2", "zoox_density", "zoox_cells_cm2", "afdw_mg_cm2",
  "protein_mg_cm2_se", "carb_mg_cm2_se", "zoox_density_se", "zoox_cells_cm2_se",
  "afdw_mg_cm2_se"
)

physiology <- phys_raw %>%
  janitor::clean_names() %>%
  select(-matches("^(x|X)(\\d+)?$")) %>%
  mutate(
    slurry_date = suppressWarnings(mdy(slurry_date)),
    dna_collection_date = suppressWarnings(mdy(dna_collection_date)),
    branch_width = str_replace_all(branch_width, "_", " ")
  ) %>%
  mutate(
    across(
      any_of(numeric_phys_cols),
      ~suppressWarnings(as.numeric(.x))
    )
  )

phys_summary <- physiology %>%
  group_by(coral_id) %>%
  summarise(
    n_tissue_samples = n(),
    mean_surface_area_cm2 = mean(surface_area, na.rm = TRUE),
    mean_protein_mg_cm2 = mean(protein_mg_cm2, na.rm = TRUE),
    mean_carb_mg_cm2 = mean(carb_mg_cm2, na.rm = TRUE),
    mean_zoox_density_cells_ml = mean(zoox_density, na.rm = TRUE),
    mean_zoox_cells_cm2 = mean(zoox_cells_cm2, na.rm = TRUE),
    mean_afdw_mg_cm2 = mean(afdw_mg_cm2, na.rm = TRUE),
    .groups = "drop"
  )

glimpse(phys_summary)
## Rows: 108
## Columns: 8
## $ coral_id                   <chr> "HAU-POC01", "HAU-POC02", "HAU-POC03", "HAU…
## $ n_tissue_samples           <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ mean_surface_area_cm2      <dbl> 6615.96, 6214.22, 6547.74, 8961.97, 2045.22…
## $ mean_protein_mg_cm2        <dbl> 0.0004162661, NaN, 0.0031613961, 0.00385897…
## $ mean_carb_mg_cm2           <dbl> 0.0005019176, NaN, 0.0004378101, 0.00060797…
## $ mean_zoox_density_cells_ml <dbl> NaN, 326666.7, 1415000.0, 1083333.3, 163666…
## $ mean_zoox_cells_cm2        <dbl> NaN, 683.3789, 5402.6275, 2659.3855, 1200.3…
## $ mean_afdw_mg_cm2           <dbl> 0.03687296, 0.06234089, 0.04429009, 0.03645…

6 Derived Community Metrics

6.1 Species-by-Colony Matrix & Diversity

cafi_counts <- cafi %>%
  filter(!is.na(taxa_lowest)) %>%
  group_by(coral_id, taxa_lowest) %>%
  summarise(
    individuals = n(),
    mean_size_mm = mean(cafi_size_mm, na.rm = TRUE),
    size_sum_mm = if_else(
      all(is.na(cafi_size_mm)),
      NA_real_,
      sum(cafi_size_mm, na.rm = TRUE)
    ),
    .groups = "drop"
  )

community_matrix_tbl <- cafi_counts %>%
  select(coral_id, taxa_lowest, individuals) %>%
  pivot_wider(
    names_from = taxa_lowest,
    values_from = individuals,
    values_fill = 0
  ) %>%
  arrange(coral_id)

community_matrix <- community_matrix_tbl %>%
  column_to_rownames(var = "coral_id") %>%
  as.matrix()

total_individuals_vec <- rowSums(community_matrix)
richness_vec <- rowSums(community_matrix > 0)
shannon_vec <- vegan::diversity(community_matrix, index = "shannon")
simpson_vec <- vegan::diversity(community_matrix, index = "simpson")
evenness_vec <- if_else(richness_vec > 0, shannon_vec / log(richness_vec), NA_real_)

diversity_df <- tibble(
  coral_id = rownames(community_matrix),
  total_individuals = as.integer(total_individuals_vec),
  species_richness = richness_vec,
  shannon = shannon_vec,
  simpson = simpson_vec,
  pielou_evenness = evenness_vec
) %>%
  arrange(coral_id)

cafi_size_summary <- cafi %>%
  group_by(coral_id) %>%
  summarise(
    mean_cafi_size_mm = mean(cafi_size_mm, na.rm = TRUE),
    median_cafi_size_mm = median(cafi_size_mm, na.rm = TRUE),
    size_sd_mm = sd(cafi_size_mm, na.rm = TRUE),
    .groups = "drop"
  )

cafi_type_summary <- cafi %>%
  filter(!is.na(type)) %>%
  mutate(type_clean = str_replace_all(str_to_lower(type), "[^a-z0-9]+", "_")) %>%
  count(coral_id, type_clean, name = "n") %>%
  mutate(type_clean = str_c("n_type_", type_clean)) %>%
  pivot_wider(
    names_from = type_clean,
    values_from = n,
    values_fill = 0
  )

diversity_df

6.2 Consolidated Analytic Dataset

survey_analytic_df <- characteristics %>%
  left_join(diversity_df, by = "coral_id") %>%
  left_join(cafi_size_summary, by = "coral_id") %>%
  left_join(cafi_type_summary, by = "coral_id") %>%
  left_join(phys_summary, by = "coral_id") %>%
  mutate(
    total_individuals = replace_na(total_individuals, 0L),
    species_richness = replace_na(species_richness, 0),
    shannon = replace_na(shannon, 0),
    simpson = replace_na(simpson, 0),
    pielou_evenness = replace_na(pielou_evenness, 0),
    mean_cafi_size_mm = if_else(is.nan(mean_cafi_size_mm), NA_real_, mean_cafi_size_mm),
    median_cafi_size_mm = if_else(is.nan(median_cafi_size_mm), NA_real_, median_cafi_size_mm),
    size_sd_mm = if_else(is.nan(size_sd_mm), NA_real_, size_sd_mm),
    combined_total_volume_of_neighbors = replace_na(combined_total_volume_of_neighbors, 0),
    combined_live_volume_of_neighbors = replace_na(combined_live_volume_of_neighbors, 0),
    mean_total_volume_of_neighbors = replace_na(mean_total_volume_of_neighbors, 0),
    mean_live_volume_of_neighbors = replace_na(mean_live_volume_of_neighbors, 0),
    number_of_neighbors = replace_na(number_of_neighbors, 0),
    neighbor_density = replace_na(neighbor_density, 0),
    neighbor_total_volume_density = replace_na(neighbor_total_volume_density, 0),
    neighbor_live_volume_density = replace_na(neighbor_live_volume_density, 0),
    coral_volume_cm3 = replace_na(coral_volume_cm3, 0),
    log_coral_volume = log10(coral_volume_cm3 + 1),
    log_total_individuals = log10(total_individuals + 1),
    log_live_neighbor_volume = log10(combined_live_volume_of_neighbors + 1),
    log_neighbor_density = log10(neighbor_density + 1e-6)
  ) %>%
  mutate(across(starts_with("n_type_"), ~replace_na(.x, 0L)))

glimpse(survey_analytic_df)
## Rows: 114
## Columns: 53
## $ coral_id                           <chr> "MRB-POC01", "MRB-POC02", "MRB-POC0…
## $ region                             <fct> MRB, MRB, MRB, MRB, MRB, MRB, MRB, …
## $ site_block                         <chr> "MRB1", "MRB1", "MRB2", "MRB2", "MR…
## $ survey_type                        <chr> "neighborhood", "neighborhood", "ne…
## $ sample_date                        <date> 2019-06-25, 2019-06-25, 2019-06-26…
## $ morphotype                         <fct> Tight branching, Wide branching, Wi…
## $ branch_architecture_raw            <chr> "tight", "wide", "wide", "tight", "…
## $ depth                              <dbl> 6, 6, 6, 5, 6, 6, 7, 7, 5, 6, 6, 7,…
## $ coral_length_cm                    <dbl> NA, 26, NA, 17, 36, 17, 25, 24, 31,…
## $ coral_width_cm                     <dbl> NA, 20, NA, 15, 22, 14, 21, 22, 24,…
## $ coral_height_cm                    <dbl> NA, 13, NA, 11, 27, 11, 18, 18, 15,…
## $ coral_volume_cm3                   <dbl> 0.000, 3539.528, 0.000, 1468.695, 1…
## $ coral_circumference_cm             <dbl> NA, 65, NA, 30, 83, 30, 69, 73, 81,…
## $ h_substrate                        <dbl> 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 1…
## $ number_of_neighbors                <dbl> 22, 21, 53, 66, 24, 38, 28, 36, 14,…
## $ neighbor_density                   <dbl> 1.75070437, 1.67112690, 4.21760599,…
## $ mean_neighbor_distance             <dbl> 137.22727, 88.85714, 113.37736, 128…
## $ mean_total_volume_of_neighbors     <dbl> 22.32435, 985.23836, 113.67033, 642…
## $ combined_total_volume_of_neighbors <dbl> 491.1357, 20690.0056, 6024.5275, 42…
## $ mean_live_volume_of_neighbors      <dbl> 22.31007, 976.43068, 92.82122, 387.…
## $ combined_live_volume_of_neighbors  <dbl> 490.8215, 20505.0444, 4919.5247, 25…
## $ neighbor_total_volume_density      <dbl> 39.08333, 1646.45833, 479.41667, 33…
## $ neighbor_live_volume_density       <dbl> 39.05833, 1631.73958, 391.48333, 20…
## $ notes                              <chr> "slurry not retained - proper mater…
## $ total_individuals                  <int> 13, 30, 42, 17, 58, 39, 37, 27, 30,…
## $ species_richness                   <dbl> 3, 5, 4, 3, 6, 5, 4, 4, 2, 3, 4, 4,…
## $ shannon                            <dbl> 0.6870920, 0.8661933, 0.8551727, 0.…
## $ simpson                            <dbl> 0.37869822, 0.43111111, 0.48412698,…
## $ pielou_evenness                    <dbl> 0.6254181, 0.5381961, 0.6168767, 0.…
## $ mean_cafi_size_mm                  <dbl> 37.500000, 30.000000, NA, NA, NA, N…
## $ median_cafi_size_mm                <dbl> 37.50, 30.00, NA, NA, NA, NA, 60.00…
## $ size_sd_mm                         <dbl> 3.535534, NA, NA, NA, NA, NA, NA, N…
## $ n_type_crab                        <int> 2, 9, 6, 7, 21, 4, 7, 12, 10, 14, 1…
## $ n_type_fish                        <int> 2, 3, 0, 0, 4, 0, 2, 4, 4, 6, 4, 2,…
## $ n_type_shrimp                      <int> 7, 14, 32, 8, 23, 28, 24, 7, 11, 14…
## $ n_type_snail                       <int> 0, 2, 2, 0, 3, 2, 3, 2, 1, 1, 2, 8,…
## $ n_type_squat_lobster               <int> 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0,…
## $ n_type_echinoderm                  <int> 1, 0, 0, 0, 0, 2, 1, 0, 0, 2, 3, 7,…
## $ n_type_hermit                      <int> 0, 1, 1, 0, 2, 2, 0, 2, 4, 10, 3, 6…
## $ n_type_worm                        <int> 1, 1, 1, 2, 3, 0, 0, 0, 0, 1, 0, 0,…
## $ n_type_amphipod                    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ n_type_amph                        <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,…
## $ n_tissue_samples                   <int> NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ mean_surface_area_cm2              <dbl> NA, 2181.660, 3428.570, 2387.836, 6…
## $ mean_protein_mg_cm2                <dbl> NA, 0.0065042216, 0.0055278148, 0.0…
## $ mean_carb_mg_cm2                   <dbl> NA, 0.0008617291, 0.0008181253, 0.0…
## $ mean_zoox_density_cells_ml         <dbl> NA, 223000.0, 1026666.7, 431666.7, …
## $ mean_zoox_cells_cm2                <dbl> NA, 3066.4723, 6737.5028, 2802.0489…
## $ mean_afdw_mg_cm2                   <dbl> NA, 0.13819752, 0.05282815, 0.08146…
## $ log_coral_volume                   <dbl> 0.000000, 3.549068, 0.000000, 3.167…
## $ log_total_individuals              <dbl> 1.146128, 1.491362, 1.633468, 1.255…
## $ log_live_neighbor_volume           <dbl> 2.691808, 4.311882, 3.692011, 4.407…
## $ log_neighbor_density               <dbl> 0.24321306, 0.22300969, 0.62506611,…

7 Colony Context & Summary Statistics

colony_summary <- survey_analytic_df %>%
  group_by(region, morphotype) %>%
  summarise(
    n_colonies = n(),
    median_volume_cm3 = median(coral_volume_cm3, na.rm = TRUE),
    iqr_volume_cm3 = IQR(coral_volume_cm3, na.rm = TRUE),
    median_neighbors = median(number_of_neighbors, na.rm = TRUE),
    median_total_individuals = median(total_individuals, na.rm = TRUE),
    q1_richness = quantile(species_richness, 0.25, na.rm = TRUE),
    median_richness = median(species_richness, na.rm = TRUE),
    q3_richness = quantile(species_richness, 0.75, na.rm = TRUE),
    .groups = "drop"
  )

knitr::kable(
  colony_summary,
  caption = "Focal colony context by region and morphotype"
)
Focal colony context by region and morphotype
region morphotype n_colonies median_volume_cm3 iqr_volume_cm3 median_neighbors median_total_individuals q1_richness median_richness q3_richness
MRB Tight branching 19 2450.442 3767.817 15.0 37.0 2 3 4.00
MRB Wide branching 18 3932.750 6094.035 8.5 34.0 2 3 3.75
HAU Tight branching 20 4063.126 4822.345 1.5 21.0 2 3 3.00
HAU Wide branching 18 6168.517 9861.852 1.5 22.0 2 3 3.00
MAT Tight branching 20 4368.908 3924.635 4.0 27.5 2 3 4.00
MAT Wide branching 19 3979.351 5596.747 4.0 21.0 2 3 3.00

8 Abundance & Diversity Patterns

8.1 CAFI Abundance by Region and Morphotype

p_abundance_region <- survey_analytic_df %>%
  ggplot(aes(x = region, y = total_individuals, fill = morphotype)) +
  geom_boxplot(alpha = 0.8, outlier.shape = 21, outlier.fill = "white") +
  scale_fill_manual(values = morphotype_palette, na.translate = FALSE) +
  scale_y_log10(labels = label_number(big.mark = ",")) +
  labs(
    x = "Backreef region",
    y = "Total CAFI individuals (log10 scale)",
    fill = "Morphotype",
    title = "CAFI abundance varies across regions and morphotypes"
  ) +
  plot_theme

p_abundance_neighbor <- survey_analytic_df %>%
  ggplot(aes(x = combined_live_volume_of_neighbors + 1, y = total_individuals, color = region)) +
  geom_point(alpha = 0.75, size = 2.5) +
  geom_smooth(method = "lm", se = TRUE, color = "black") +
  scale_color_manual(values = region_palette) +
  scale_x_log10(labels = label_number(scale_cut = cut_short_scale())) +
  scale_y_log10(labels = label_number(scale_cut = cut_short_scale())) +
  facet_wrap(~ morphotype) +
  labs(
    x = "Live neighbor Pocillopora volume (cm³, log scale)",
    y = "Total CAFI individuals (log scale)",
    color = "Region",
    title = "Higher live Pocillopora neighborhood volume is linked to richer CAFI communities"
  ) +
  plot_theme

p_abundance_region + p_abundance_neighbor + plot_layout(guides = "collect")

ggsave(
  filename = here(figure_dir, "cafi_abundance_regional_patterns.png"),
  plot = p_abundance_region,
  width = 7,
  height = 5,
  dpi = 300
)

ggsave(
  filename = here(figure_dir, "cafi_abundance_vs_neighbor_volume.png"),
  plot = p_abundance_neighbor,
  width = 7,
  height = 5,
  dpi = 300
)

8.2 Diversity Metrics

p_diversity <- survey_analytic_df %>%
  ggplot(aes(x = morphotype, y = shannon, fill = region)) +
  geom_boxplot(alpha = 0.9, outlier.shape = NA, width = 0.7) +
  geom_jitter(
    aes(color = region),
    width = 0.15,
    alpha = 0.6,
    size = 2
  ) +
  scale_fill_manual(values = region_palette) +
  scale_color_manual(values = region_palette) +
  labs(
    x = "Morphotype",
    y = "Shannon diversity",
    title = "CAFI diversity by morphotype and region"
  ) +
  plot_theme

p_evenness_volume <- survey_analytic_df %>%
  ggplot(aes(x = coral_volume_cm3 + 1, y = pielou_evenness, color = morphotype)) +
  geom_point(alpha = 0.75, size = 2.5) +
  geom_smooth(method = "loess", se = TRUE) +
  scale_color_manual(values = morphotype_palette) +
  scale_x_log10(labels = label_number(scale_cut = cut_short_scale())) +
  labs(
    x = "Coral colony volume (cm³, log scale)",
    y = "Pielou's evenness",
    color = "Morphotype",
    title = "Evenness is highest on intermediate-sized colonies"
  ) +
  plot_theme

p_diversity + p_evenness_volume + plot_layout(guides = "collect")

ggsave(
  filename = here(figure_dir, "cafi_diversity_summaries.png"),
  plot = p_diversity + p_evenness_volume + plot_layout(guides = "collect"),
  width = 10,
  height = 5,
  dpi = 300
)

8.3 Taxonomic Composition by Region

type_composition <- cafi %>%
  filter(!is.na(type)) %>%
  mutate(
    type_group = fct_lump_n(type, n = 8, other_level = "Other"),
    type_group = fct_reorder(type_group, type_group, .fun = length),
    region = fct_drop(region)
  ) %>%
  count(region, morphotype, type_group, name = "n") %>%
  group_by(region, morphotype) %>%
  mutate(
    prop = n / sum(n)
  ) %>%
  ungroup()

p_type_composition <- type_composition %>%
  ggplot(aes(x = region, y = prop, fill = type_group)) +
  geom_col(position = "stack", alpha = 0.9) +
  facet_wrap(~ morphotype, nrow = 1) +
  scale_y_continuous(labels = label_percent(accuracy = 1)) +
  labs(
    x = "Region",
    y = "Community share",
    fill = "Functional group",
    title = "CAFI functional composition differs by site and morphotype"
  ) +
  plot_theme

p_type_composition

ggsave(
  filename = here(figure_dir, "cafi_functional_composition.png"),
  plot = p_type_composition,
  width = 9,
  height = 5,
  dpi = 300
)

9 Statistical Models for Key Drivers

9.1 Negative Binomial Model – CAFI Abundance

count_model_df <- survey_analytic_df %>%
  filter(
    coral_volume_cm3 > 0,
    combined_live_volume_of_neighbors >= 0,
    !is.na(morphotype),
    !is.na(region)
  ) %>%
  mutate(
    scaled_log_coral_volume = as.numeric(scale(log10(coral_volume_cm3))),
    scaled_log_live_neighbor_volume = as.numeric(scale(log10(combined_live_volume_of_neighbors + 1))),
    scaled_neighbor_density = as.numeric(scale(neighbor_density))
  )

count_model <- MASS::glm.nb(
  total_individuals ~ region + morphotype +
    scaled_log_coral_volume +
    scaled_log_live_neighbor_volume +
    scaled_neighbor_density,
  data = count_model_df
)

count_model_tidy <- broom::tidy(
  count_model,
  exponentiate = TRUE,
  conf.int = TRUE
) %>%
  mutate(across(where(is.numeric), ~round(.x, 3)))

count_model_glance <- broom::glance(count_model)

knitr::kable(
  count_model_tidy,
  caption = "Negative binomial model of CAFI abundance (rate ratios)"
)
Negative binomial model of CAFI abundance (rate ratios)
term estimate std.error statistic p.value conf.low conf.high
(Intercept) 38.144 0.107 33.925 0.000 31.016 47.278
regionHAU 0.734 0.137 -2.255 0.024 0.561 0.960
regionMAT 0.845 0.134 -1.256 0.209 0.648 1.102
morphotypeWide branching 0.738 0.102 -2.972 0.003 0.605 0.901
scaled_log_coral_volume 2.113 0.057 13.042 0.000 1.889 2.366
scaled_log_live_neighbor_volume 0.952 0.073 -0.680 0.497 0.820 1.105
scaled_neighbor_density 1.061 0.075 0.796 0.426 0.902 1.260

9.2 Linear Model – Shannon Diversity

diversity_model_df <- survey_analytic_df %>%
  filter(
    coral_volume_cm3 > 0,
    !is.na(morphotype),
    !is.na(region),
    !is.na(mean_protein_mg_cm2)
  ) %>%
  mutate(
    scaled_log_coral_volume = as.numeric(scale(log10(coral_volume_cm3))),
    scaled_protein = as.numeric(scale(mean_protein_mg_cm2)),
    scaled_log_live_neighbor_volume = as.numeric(scale(log10(combined_live_volume_of_neighbors + 1)))
  )

diversity_model <- lm(
  shannon ~ region + morphotype +
    scaled_log_coral_volume +
    scaled_log_live_neighbor_volume +
    scaled_protein,
  data = diversity_model_df
)

diversity_model_tidy <- broom::tidy(
  diversity_model,
  conf.int = TRUE
) %>%
  mutate(across(where(is.numeric), ~round(.x, 3)))

diversity_model_glance <- broom::glance(diversity_model)

knitr::kable(
  diversity_model_tidy,
  caption = "Linear model of Shannon diversity"
)
Linear model of Shannon diversity
term estimate std.error statistic p.value conf.low conf.high
(Intercept) 0.531 0.063 8.435 0.000 0.406 0.656
regionHAU -0.072 0.073 -0.983 0.328 -0.218 0.074
regionMAT -0.096 0.074 -1.297 0.198 -0.242 0.051
morphotypeWide branching 0.053 0.064 0.827 0.410 -0.074 0.179
scaled_log_coral_volume 0.064 0.031 2.095 0.039 0.003 0.125
scaled_log_live_neighbor_volume 0.006 0.031 0.188 0.851 -0.055 0.066
scaled_protein 0.043 0.032 1.333 0.186 -0.021 0.107

10 Multivariate Community Structure

10.1 NMDS Ordination

valid_rows <- total_individuals_vec > 0
community_for_ord <- community_matrix[valid_rows, ]

nmds_df <- survey_analytic_df %>%
  filter(coral_id %in% rownames(community_for_ord)) %>%
  select(coral_id, region, morphotype, log_coral_volume, log_live_neighbor_volume) %>%
  mutate(
    scaled_log_coral_volume = as.numeric(scale(log_coral_volume)),
    scaled_log_live_neighbor_volume = as.numeric(scale(log_live_neighbor_volume))
  ) %>%
  column_to_rownames("coral_id")

community_for_ord <- community_for_ord[rownames(nmds_df), ]

set.seed(2025)
nmds_fit <- vegan::metaMDS(
  community_for_ord,
  distance = "bray",
  k = 2,
  trymax = 200,
  autotransform = FALSE
)
## Run 0 stress 0.08522801 
## Run 1 stress 0.09962531 
## Run 2 stress 0.09124473 
## Run 3 stress 0.104316 
## Run 4 stress 0.09165996 
## Run 5 stress 0.09136372 
## Run 6 stress 0.09618744 
## Run 7 stress 0.0996444 
## Run 8 stress 0.1007685 
## Run 9 stress 0.08625413 
## Run 10 stress 0.07103067 
## ... New best solution
## ... Procrustes: rmse 0.01563212  max resid 0.09477906 
## Run 11 stress 0.07100285 
## ... New best solution
## ... Procrustes: rmse 0.003218389  max resid 0.0236027 
## Run 12 stress 0.0974593 
## Run 13 stress 0.08604393 
## Run 14 stress 0.0793779 
## Run 15 stress 0.09241526 
## Run 16 stress 0.09576412 
## Run 17 stress 0.08490274 
## Run 18 stress 0.06944839 
## ... New best solution
## ... Procrustes: rmse 0.01578052  max resid 0.1402104 
## Run 19 stress 0.08406536 
## Run 20 stress 0.09809897 
## Run 21 stress 0.09611493 
## Run 22 stress 0.08785304 
## Run 23 stress 0.09649643 
## Run 24 stress 0.08864346 
## Run 25 stress 0.07801675 
## Run 26 stress 0.09536803 
## Run 27 stress 0.08590269 
## Run 28 stress 0.09010298 
## Run 29 stress 0.0967086 
## Run 30 stress 0.07057654 
## Run 31 stress 0.09348961 
## Run 32 stress 0.09878032 
## Run 33 stress 0.09292848 
## Run 34 stress 0.07057684 
## Run 35 stress 0.07974323 
## Run 36 stress 0.09274154 
## Run 37 stress 0.09243711 
## Run 38 stress 0.0992964 
## Run 39 stress 0.09365048 
## Run 40 stress 0.08231855 
## Run 41 stress 0.08353018 
## Run 42 stress 0.08818121 
## Run 43 stress 0.09802528 
## Run 44 stress 0.09243676 
## Run 45 stress 0.07801679 
## Run 46 stress 0.07927319 
## Run 47 stress 0.07937816 
## Run 48 stress 0.09645109 
## Run 49 stress 0.1009254 
## Run 50 stress 0.08094626 
## Run 51 stress 0.1057153 
## Run 52 stress 0.08349488 
## Run 53 stress 0.09668046 
## Run 54 stress 0.09669814 
## Run 55 stress 0.07057662 
## Run 56 stress 0.09409087 
## Run 57 stress 0.07969821 
## Run 58 stress 0.07974273 
## Run 59 stress 0.08778661 
## Run 60 stress 0.08094586 
## Run 61 stress 0.07100226 
## Run 62 stress 0.10258 
## Run 63 stress 0.09125524 
## Run 64 stress 0.09169144 
## Run 65 stress 0.09942895 
## Run 66 stress 0.09188527 
## Run 67 stress 0.0900846 
## Run 68 stress 0.09573424 
## Run 69 stress 0.07794443 
## Run 70 stress 0.1016823 
## Run 71 stress 0.07057662 
## Run 72 stress 0.09234937 
## Run 73 stress 0.09833703 
## Run 74 stress 0.08546521 
## Run 75 stress 0.08920936 
## Run 76 stress 0.1022597 
## Run 77 stress 0.09627738 
## Run 78 stress 0.08984352 
## Run 79 stress 0.08203874 
## Run 80 stress 0.08934162 
## Run 81 stress 0.06944183 
## ... New best solution
## ... Procrustes: rmse 0.0008549897  max resid 0.00476795 
## ... Similar to previous best
## *** Best solution repeated 1 times
nmds_scores <- vegan::scores(nmds_fit, display = "sites") %>%
  as_tibble(rownames = "coral_id") %>%
  left_join(
    survey_analytic_df %>%
      select(coral_id, region, morphotype, log_coral_volume, log_live_neighbor_volume),
    by = "coral_id"
  )

p_nmds <- nmds_scores %>%
  ggplot(aes(x = NMDS1, y = NMDS2, color = region, shape = morphotype, size = log_coral_volume)) +
  geom_point(alpha = 0.85) +
  scale_color_manual(values = region_palette) +
  scale_size_continuous(range = c(2, 6)) +
  coord_equal() +
  labs(
    x = "NMDS axis 1",
    y = "NMDS axis 2",
    color = "Region",
    shape = "Morphotype",
    size = "log10 colony volume",
    title = "NMDS ordination of CAFI community composition (Bray-Curtis)"
  ) +
  plot_theme

p_nmds

ggsave(
  filename = here(figure_dir, "cafi_ordination_nmds.png"),
  plot = p_nmds,
  width = 7,
  height = 6,
  dpi = 300
)

10.2 PERMANOVA

permanova_df <- survey_analytic_df %>%
  filter(coral_id %in% rownames(community_for_ord)) %>%
  mutate(
    scaled_log_coral_volume = as.numeric(scale(log_coral_volume)),
    scaled_log_live_neighbor_volume = as.numeric(scale(log_live_neighbor_volume))
  ) %>%
  drop_na(region, morphotype, scaled_log_coral_volume, scaled_log_live_neighbor_volume) %>%
  select(coral_id, region, morphotype, scaled_log_coral_volume, scaled_log_live_neighbor_volume) %>%
  column_to_rownames("coral_id")

community_for_perm <- community_for_ord[rownames(permanova_df), ]

if (nrow(permanova_df) >= 3) {
  set.seed(2025)
  permanova <- vegan::adonis2(
    community_for_perm ~ region + morphotype +
      scaled_log_coral_volume + scaled_log_live_neighbor_volume,
    data = permanova_df,
    permutations = 999,
    method = "bray"
  )

  permanova_tbl <- permanova %>%
    as.data.frame() %>%
    rownames_to_column("term")

  knitr::kable(
    permanova_tbl,
    digits = 3,
    caption = "PERMANOVA results for CAFI community structure"
  )
} else {
  permanova_tbl <- tibble(
    term = character(),
    SumOfSqs = numeric(),
    R2 = numeric(),
    F = numeric(),
    Pr = numeric()
  )
  message("PERMANOVA skipped: fewer than 3 colonies with complete data.")
}
PERMANOVA results for CAFI community structure
term Df SumOfSqs R2 F Pr(>F)
Model 5 4.754 0.292 8.898 0.001
Residual 108 11.541 0.708 NA NA
Total 113 16.295 1.000 NA NA

11 Export Key Outputs

readr::write_csv(
  survey_analytic_df,
  here(table_dir, "survey_analytic_dataset.csv")
)

readr::write_csv(
  colony_summary,
  here(table_dir, "colony_summary_by_region_morphotype.csv")
)

readr::write_csv(
  count_model_tidy,
  here(table_dir, "model_cafi_abundance_nb.csv")
)

readr::write_csv(
  diversity_model_tidy,
  here(table_dir, "model_shannon_linear.csv")
)

readr::write_csv(
  permanova_tbl,
  here(table_dir, "permanova_cafi_composition.csv")
)

readr::write_csv(
  type_composition,
  here(table_dir, "cafi_functional_composition_by_region.csv")
)

12 Notes & Next Steps

  • Verify model assumptions through posterior predictive checks (e.g., DHARMa for the negative binomial model) before publication.
  • Integrate fish biomass conversions when length–weight coefficients become available to extend functional analyses beyond counts.
  • Consider spatial autocorrelation tests if precise colony coordinates are available (currently only distances to neighbors are used).

13 Session Info

sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: aarch64-apple-darwin20
## Running under: macOS Tahoe 26.0.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] C.UTF-8/C.UTF-8/C.UTF-8/C/C.UTF-8/C.UTF-8
## 
## time zone: America/Los_Angeles
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] MASS_7.3-65      scales_1.4.0     patchwork_1.3.1  broom_1.0.9     
##  [5] vegan_2.7-1      permute_0.9-8    fs_1.6.6         glue_1.8.0      
##  [9] janitor_2.2.1    lubridate_1.9.4  forcats_1.0.0    stringr_1.5.1   
## [13] dplyr_1.1.4      purrr_1.1.0      readr_2.1.5      tidyr_1.3.1     
## [17] tibble_3.3.0     ggplot2_3.5.2    tidyverse_2.0.0  here_1.0.1      
## [21] conflicted_1.2.0
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6       xfun_0.53          bslib_0.9.0        lattice_0.22-7    
##  [5] tzdb_0.5.0         vctrs_0.6.5        tools_4.5.1        generics_0.1.4    
##  [9] parallel_4.5.1     cluster_2.1.8.1    pkgconfig_2.0.3    Matrix_1.7-3      
## [13] RColorBrewer_1.1-3 lifecycle_1.0.4    compiler_4.5.1     farver_2.1.2      
## [17] textshaping_1.0.1  snakecase_0.11.1   htmltools_0.5.8.1  sass_0.4.10       
## [21] yaml_2.3.10        pillar_1.11.0      crayon_1.5.3       jquerylib_0.1.4   
## [25] cachem_1.1.0       nlme_3.1-168       tidyselect_1.2.1   digest_0.6.37     
## [29] stringi_1.8.7      labeling_0.4.3     splines_4.5.1      rprojroot_2.1.0   
## [33] fastmap_1.2.0      grid_4.5.1         cli_3.6.5          magrittr_2.0.3    
## [37] withr_3.0.2        backports_1.5.0    bit64_4.6.0-1      timechange_0.3.0  
## [41] rmarkdown_2.29     bit_4.6.0          ragg_1.4.0         hms_1.1.3         
## [45] memoise_2.0.1      evaluate_1.0.4     knitr_1.50         mgcv_1.9-3        
## [49] rlang_1.1.6        vroom_1.6.5        jsonlite_2.0.0     R6_2.6.1          
## [53] systemfonts_1.2.3